#https://data.cdc.gov/NCHS/Weekly-Counts-of-Deaths-by-State-and-Select-Causes/muzy-jte6/data

# loading packages
library(tidyverse)
library(RSocrata)
library(hablar)
library(geofacet)
library(lubridate)
library(janitor)


weeks <- read_csv("Data/weeks.csv")
weeks <- weeks %>% 
  mutate(start_date=dmy(start_date)-1,
         end_date=dmy(end_date)-1)
  

# importing cdc cause specific death data for 2014-2018, grabbing columns for cause, omitting flag columns. Full list here: https://dev.socrata.com/foundry/data.cdc.gov/3yf8-kanr
causes_1418 <- read_csv(
  "https://data.cdc.gov/api/views/3yf8-kanr/rows.csv?accessType=DOWNLOAD") %>% 
  data.frame() %>% 
  clean_names() %>% 
  select(jurisdiction_of_occurrence,
         mmwryear=mmwr_year, mmwrweek=mmwr_week, weekendingdate=week_ending_date,
         allcause=all_cause,naturalcause=natural_cause,
         malignant_neoplasms_c00_c97, alzheimer_disease_g30,
         chronic_lower_respiratory=chronic_lower_respiratory_diseases_j40_j47 , symptoms_signs_and_abnormal=symptoms_signs_and_abnormal_clinical_and_laboratory_findings_not_elsewhere_classified_r00_r99,
         diseases_of_heart_i00_i09=diseases_of_heart_i00_i09_i11_i13_i20_i51, cerebrovascular_diseases=cerebrovascular_diseases_i60_i69 ,
         septicemia_a40_a41, diabetes_mellitus_e10_e14,
         influenza_and_pneumonia_j10=influenza_and_pneumonia_j10_j18,
         other_diseases_of_respiratory=other_diseases_of_respiratory_system_j00_j06_j30_j39_j67_j70_j98,
         nephritis_nephrotic_syndrome=nephritis_nephrotic_syndrome_and_nephrosis_n00_n07_n17_n19_n25_n27) %>% 
  mutate(weekendingdate=mdy(weekendingdate),
        covid19_multiple_causes_of_death=NA,
         covid19_underlying_cause_of_death=NA,
         mmwryear=as.numeric(mmwryear),
         mmwrweek=as.numeric(mmwrweek))

# importing cdc cause specific death data for 2019-2020
#causes_1920 <- read.socrata(
#  "https://data.cdc.gov/resource/muzy-jte6.json?$select=jurisdiction_of_occurrence,mmwryear,mmwrweek,weekendingdate,allcause,naturalcause,septicemia_a40_a41,malignant_neoplasms_c00_c97,diabetes_mellitus_e10_e14,alzheimer_disease_g30,influenza_and_pneumonia_j10,chronic_lower_respiratory,other_diseases_of_respiratory,nephritis_nephrotic_syndrome,symptoms_signs_and_abnormal,diseases_of_heart_i00_i09,cerebrovascular_diseases"
#)

#cause_names <- colnames(causes_1418)
library(stringr)
library(lubridate)
library(jsonlite)
library(janitor)

causes_1920 <- read_csv("https://data.cdc.gov/api/views/muzy-jte6/rows.csv?accessType=DOWNLOAD") %>% 
  data.frame() %>% 
  clean_names() %>% 
  select(jurisdiction_of_occurrence,
         mmwryear=mmwr_year, mmwrweek=mmwr_week, weekendingdate=week_ending_date,
         allcause=all_cause,naturalcause=natural_cause,
         malignant_neoplasms_c00_c97, alzheimer_disease_g30,
         chronic_lower_respiratory=chronic_lower_respiratory_diseases_j40_j47 , symptoms_signs_and_abnormal=symptoms_signs_and_abnormal_clinical_and_laboratory_findings_not_elsewhere_classified_r00_r99,
         diseases_of_heart_i00_i09=diseases_of_heart_i00_i09_i11_i13_i20_i51, cerebrovascular_diseases=cerebrovascular_diseases_i60_i69 ,
         septicemia_a40_a41, diabetes_mellitus_e10_e14,
         influenza_and_pneumonia_j10=influenza_and_pneumonia_j10_j18,
         other_diseases_of_respiratory=other_diseases_of_respiratory_system_j00_j06_j30_j39_j67_j70_j98,
         nephritis_nephrotic_syndrome=nephritis_nephrotic_syndrome_and_nephrosis_n00_n07_n17_n19_n25_n27,
         covid19_multiple_causes_of_death=covid_19_u071_multiple_cause_of_death,
         covid19_underlying_cause_of_death=covid_19_u071_underlying_cause_of_death) %>% 
  mutate(weekendingdate=mdy(weekendingdate))

# append the 19-20 data to the older data
causes_1420 <- bind_rows(causes_1920, causes_1418)

# using hablar package's 'convert' to convert cause of death columns from character type to doubles to perform calculations
causes_1420 <- causes_1420 %>%
  convert(dbl(allcause,
              naturalcause,
              septicemia_a40_a41,
              malignant_neoplasms_c00_c97,
              diabetes_mellitus_e10_e14,
              alzheimer_disease_g30,
              influenza_and_pneumonia_j10,
              chronic_lower_respiratory,
              other_diseases_of_respiratory,
              nephritis_nephrotic_syndrome,
              symptoms_signs_and_abnormal,
              diseases_of_heart_i00_i09,
              cerebrovascular_diseases,
              covid19_multiple_causes_of_death,
              covid19_underlying_cause_of_death
  ))
# calculate unnatural causes of death
causes_1420 <- causes_1420 %>%
  mutate(unnaturalcause = allcause - naturalcause)
# dropping Puerto Rico to count only jurisdictions in 50 states, per CDC guidance
causes_1420 <- causes_1420 %>%
  filter(jurisdiction_of_occurrence != "Puerto Rico")
# summing states and grouping data by week
causes_us_1420 <- causes_1420 %>%
  group_by(weekendingdate) %>%
  summarize(allcause = sum(allcause, na.rm=TRUE),
            naturalcause = sum(naturalcause, na.rm=TRUE),
            unnaturalcause = sum(unnaturalcause, na.rm=TRUE),
            septicemia_a40_a41 = sum(septicemia_a40_a41, na.rm=TRUE),
            malignant_neoplasms_c00_c97 = sum(malignant_neoplasms_c00_c97, na.rm=TRUE),
            diabetes_mellitus_e10_e14 = sum(diabetes_mellitus_e10_e14, na.rm=TRUE),
            alzheimer_disease_g30 = sum(alzheimer_disease_g30, na.rm=TRUE),
            influenza_and_pneumonia_j10 = sum(influenza_and_pneumonia_j10, na.rm=TRUE),
            chronic_lower_respiratory = sum(chronic_lower_respiratory, na.rm=TRUE),
            other_diseases_of_respiratory = sum(other_diseases_of_respiratory, na.rm=TRUE),
            nephritis_nephrotic_syndrome = sum(nephritis_nephrotic_syndrome, na.rm=TRUE),
            symptoms_signs_and_abnormal = sum(symptoms_signs_and_abnormal, na.rm=TRUE),
            diseases_of_heart_i00_i09 = sum(diseases_of_heart_i00_i09,na.rm=TRUE),
            cerebrovascular_diseases = sum(cerebrovascular_diseases, na.rm=TRUE),
            covid19_multiple_causes_of_death=sum(covid19_multiple_causes_of_death, na.rm=T),
            covid19_underlying_cause_of_death=sum(covid19_underlying_cause_of_death, na.rm=T)) %>%
  mutate(naturalcause_perc = round(naturalcause/allcause,3),
         unnaturalcause_perc = round(unnaturalcause/allcause,3),
         septicemia_perc = round(septicemia_a40_a41/allcause,3),
         malignant_neoplasms_perc = round(malignant_neoplasms_c00_c97/allcause,3),
         diabetes_mellitus_perc = round(diabetes_mellitus_e10_e14/allcause,3),
         alzheimer_perc = round(alzheimer_disease_g30/allcause,3),
         influenza_pneumonia_perc = round(influenza_and_pneumonia_j10/allcause,3),
         chronic_lower_respiratory_perc = round(chronic_lower_respiratory/allcause,3),
         other_respiratory_diseases_perc = round(other_diseases_of_respiratory/allcause,3),
         nephritis_nephrotic_syndrome_perc = round(nephritis_nephrotic_syndrome/allcause,3),
         symptoms_signs_and_abnormal_perc = round(symptoms_signs_and_abnormal/allcause,3),
         heart_disease_perc = round(diseases_of_heart_i00_i09/allcause,3),
         cerebrovascular_diseases_perc = round(cerebrovascular_diseases/allcause,3),
         covid19_multiple_causes_of_death_perc=round(covid19_multiple_causes_of_death/allcause,3),
         covid19_underlying_cause_of_death_perc=round(covid19_underlying_cause_of_death/allcause,3)
  )
# dropping most recent weeks where data are unreliable
causes_us_1420 <- causes_us_1420 %>%
  filter(weekendingdate != '2020-05-09',
         weekendingdate != '2020-05-02',
         weekendingdate != '2020-04-25')#,
         #weekendingdate != '2020-04-18')


de_tall <- causes_us_1420  %>% 
  pivot_longer(cols=2:ncol(causes_us_1420 ),
               names_to="type",
               values_to="count") %>% 
  left_join(weeks, by=c("weekendingdate"="end_date"))

de_tall20 <- de_tall %>% 
  filter(SEASON=="2019-20") %>% 
  filter(!grepl("_perc", type))

de_tall <- de_tall %>% 
  filter(SEASON!="2019-20") %>% 
  filter(!grepl("_perc", type))


ggplot() +
  geom_line(data=de_tall, aes(x=WEEK, y=count, group=SEASON), color="grey", alpha=.7) +
  geom_line(data=de_tall20, aes(x=WEEK, y=count, group=SEASON), color="red") +
  facet_wrap(~type, scale="free_y") +
  labs(title="Specific causes of death")

de_tall <- causes_us_1420  %>% 
  pivot_longer(cols=2:ncol(causes_us_1420 ),
               names_to="type",
               values_to="count") %>% 
  left_join(weeks, by=c("weekendingdate"="end_date"))

de_tall20 <- de_tall %>% 
  filter(SEASON=="2019-20") %>% 
  filter(grepl("_perc", type))

de_tall <- de_tall %>% 
  filter(SEASON!="2019-20") %>% 
  filter(grepl("_perc", type))


ggplot() +
  geom_line(data=de_tall, aes(x=WEEK, y=count, group=SEASON), color="grey", alpha=.7) +
  geom_line(data=de_tall20, aes(x=WEEK, y=count, group=SEASON), color="red") +
  facet_wrap(~type) +
  labs(title="Specific causes of death by percent")

ggplot() +
  geom_line(data=de_tall, aes(x=WEEK, y=count, group=SEASON), color="grey", alpha=.7) +
  geom_line(data=de_tall20, aes(x=WEEK, y=count, group=SEASON), color="red") +
  facet_wrap(~type, scale="free_y") +
  labs(title="Specific causes of death by percent (unscaled)")

weeks <- read_csv("Data/weeks.csv")
weeks <- weeks %>% 
  mutate(start_date=dmy(start_date)-1,
         end_date=dmy(end_date)-1)

state.abb <- c(state.abb, "DC")
state.name <- c(state.name, "District of Columbia")
state_names <- data.frame(state.abb, state.name)


causes_1420_long <- causes_1420 %>%
  filter(weekendingdate != '2020-05-09',
         weekendingdate != '2020-05-02',
         weekendingdate != '2020-04-25') %>% #,
         #weekendingdate != '2020-04-18')
  mutate(covid19_multiple_causes_of_death=as.numeric(covid19_multiple_causes_of_death),
         covid19_underlying_cause_of_death=as.numeric(covid19_underlying_cause_of_death)) %>% 
  ungroup() %>% 
  pivot_longer(cols=5:ncol(causes_1420),
               names_to="type",
               values_to="count") %>% 
  mutate(weekendingdate=ymd(weekendingdate)) %>% 
  left_join(weeks, by=c("weekendingdate"="end_date")) %>% 
  filter(WEEK <=15) %>% 
  left_join(state_names, by=c("jurisdiction_of_occurrence"= "state.name"))


causes_1420_long20 <- causes_1420_long %>% 
  filter(SEASON=="2019-20")
causes_1420_long <- causes_1420_long %>% 
  filter(SEASON!="2019-20")

list_of_causes <- causes_1420_long %>% 
  pull(type) %>% unique()

for (i in 1:length(list_of_causes)) {

df20 <- causes_1420_long20 %>% 
  filter(type==list_of_causes[i]) %>% 
  filter(!is.na(state.abb))

df <- causes_1420_long %>% 
  filter(type==list_of_causes[i]) %>% 
  filter(!is.na(state.abb))

plotted <- ggplot(df20) +
 geom_line(data=df, aes(x=WEEK, y=count, group=SEASON), color="grey", alpha=.7) +
  geom_line(aes(x=WEEK, y=count), color="red") +
  facet_geo(~state.abb, grid="us_state_grid1", scale="free_y")+
  labs(title=list_of_causes[i])

print(plotted)
}

causes_1420_long <- causes_1420 %>%
  filter(weekendingdate != '2020-05-09',
         weekendingdate != '2020-05-02',
         weekendingdate != '2020-04-25') %>% #,
         #weekendingdate != '2020-04-18')
  mutate(covid19_multiple_causes_of_death=as.numeric(covid19_multiple_causes_of_death),
         covid19_underlying_cause_of_death=as.numeric(covid19_underlying_cause_of_death)) %>% 
  group_by(jurisdiction_of_occurrence, weekendingdate) %>%
  summarize(allcause = sum(allcause, na.rm=TRUE),
            naturalcause = sum(naturalcause, na.rm=TRUE),
            unnaturalcause = sum(unnaturalcause, na.rm=TRUE),
            septicemia_a40_a41 = sum(septicemia_a40_a41, na.rm=TRUE),
            malignant_neoplasms_c00_c97 = sum(malignant_neoplasms_c00_c97, na.rm=TRUE),
            diabetes_mellitus_e10_e14 = sum(diabetes_mellitus_e10_e14, na.rm=TRUE),
            alzheimer_disease_g30 = sum(alzheimer_disease_g30, na.rm=TRUE),
            influenza_and_pneumonia_j10 = sum(influenza_and_pneumonia_j10, na.rm=TRUE),
            chronic_lower_respiratory = sum(chronic_lower_respiratory, na.rm=TRUE),
            other_diseases_of_respiratory = sum(other_diseases_of_respiratory, na.rm=TRUE),
            nephritis_nephrotic_syndrome = sum(nephritis_nephrotic_syndrome, na.rm=TRUE),
            symptoms_signs_and_abnormal = sum(symptoms_signs_and_abnormal, na.rm=TRUE),
            diseases_of_heart_i00_i09 = sum(diseases_of_heart_i00_i09,na.rm=TRUE),
            cerebrovascular_diseases = sum(cerebrovascular_diseases, na.rm=TRUE),
            covid19_multiple_causes_of_death=sum(covid19_multiple_causes_of_death, na.rm=T),
            covid19_underlying_cause_of_death=sum(covid19_underlying_cause_of_death, na.rm=T)) %>%
  mutate(naturalcause_perc = round(naturalcause/allcause,3),
         unnaturalcause_perc = round(unnaturalcause/allcause,3),
         septicemia_perc = round(septicemia_a40_a41/allcause,3),
         malignant_neoplasms_perc = round(malignant_neoplasms_c00_c97/allcause,3),
         diabetes_mellitus_perc = round(diabetes_mellitus_e10_e14/allcause,3),
         alzheimer_perc = round(alzheimer_disease_g30/allcause,3),
         influenza_pneumonia_perc = round(influenza_and_pneumonia_j10/allcause,3),
         chronic_lower_respiratory_perc = round(chronic_lower_respiratory/allcause,3),
         other_respiratory_diseases_perc = round(other_diseases_of_respiratory/allcause,3),
         nephritis_nephrotic_syndrome_perc = round(nephritis_nephrotic_syndrome/allcause,3),
         symptoms_signs_and_abnormal_perc = round(symptoms_signs_and_abnormal/allcause,3),
         heart_disease_perc = round(diseases_of_heart_i00_i09/allcause,3),
         cerebrovascular_diseases_perc = round(cerebrovascular_diseases/allcause,3),
         covid19_multiple_causes_of_death_perc=round(covid19_multiple_causes_of_death/allcause,3),
         covid19_underlying_cause_of_death_perc=round(covid19_underlying_cause_of_death/allcause,3)
  ) %>% 
  ungroup() 

causes_1420_long <- causes_1420_long %>% 
  pivot_longer(cols=3:ncol(causes_1420_long ),
               names_to="type",
               values_to="count") %>% 
  mutate(weekendingdate=ymd(weekendingdate)) %>% 
  left_join(weeks, by=c("weekendingdate"="end_date")) %>% 
  filter(WEEK <=15) %>% 
  left_join(state_names, by=c("jurisdiction_of_occurrence"= "state.name")) %>% 
  filter(grepl("_perc", type))


causes_1420_long20 <- causes_1420_long %>% 
  filter(SEASON=="2019-20")
causes_1420_long <- causes_1420_long %>% 
  filter(SEASON!="2019-20")

list_of_causes <- causes_1420_long %>% 
  pull(type) %>% unique()

for (i in 1:length(list_of_causes)) {

df20 <- causes_1420_long20 %>% 
  filter(type==list_of_causes[i]) %>% 
  filter(!is.na(state.abb))

df <- causes_1420_long %>% 
  filter(type==list_of_causes[i]) %>% 
  filter(!is.na(state.abb))

plotted <- ggplot(df20) +
 geom_line(data=df, aes(x=WEEK, y=count, group=SEASON), color="grey", alpha=.7) +
  geom_line(aes(x=WEEK, y=count), color="red") +
  facet_geo(~state.abb, grid="us_state_grid1", scale="free_y")+
  labs(title=list_of_causes[i])

print(plotted)
}